{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "\"\"\"\n",
    " Deeper Multi-Layer Pecptron with XAVIER Init\n",
    " Xavier init from {Project: https://github.com/aymericdamien/TensorFlow-Examples/}\n",
    " @Sungjoon Choi (sungjoon.choi@cpslab.snu.ac.kr)\n",
    "\"\"\"\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import matplotlib.pyplot as plt\n",
    "from tensorflow.examples.tutorials.mnist import input_data\n",
    "%matplotlib inline  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Extracting data/train-images-idx3-ubyte.gz\n",
      "Extracting data/train-labels-idx1-ubyte.gz\n",
      "Extracting data/t10k-images-idx3-ubyte.gz\n",
      "Extracting data/t10k-labels-idx1-ubyte.gz\n"
     ]
    }
   ],
   "source": [
    "mnist = input_data.read_data_sets('data/', one_hot=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Xavier Init\n",
    "def xavier_init(n_inputs, n_outputs, uniform=True):\n",
    "  \"\"\"Set the parameter initialization using the method described.\n",
    "  This method is designed to keep the scale of the gradients roughly the same\n",
    "  in all layers.\n",
    "  Xavier Glorot and Yoshua Bengio (2010):\n",
    "           Understanding the difficulty of training deep feedforward neural\n",
    "           networks. International conference on artificial intelligence and\n",
    "           statistics.\n",
    "  Args:\n",
    "    n_inputs: The number of input nodes into each output.\n",
    "    n_outputs: The number of output nodes for each input.\n",
    "    uniform: If true use a uniform distribution, otherwise use a normal.\n",
    "  Returns:\n",
    "    An initializer.\n",
    "  \"\"\"\n",
    "  if uniform:\n",
    "    # 6 was used in the paper.\n",
    "    init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))\n",
    "    return tf.random_uniform_initializer(-init_range, init_range)\n",
    "  else:\n",
    "    # 3 gives us approximately the same limits as above since this repicks\n",
    "    # values greater than 2 standard deviations from the mean.\n",
    "    stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))\n",
    "    return tf.truncated_normal_initializer(stddev=stddev)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Network Ready\n"
     ]
    }
   ],
   "source": [
    "# Parameters\n",
    "learning_rate   = 0.001\n",
    "training_epochs = 50\n",
    "batch_size      = 100\n",
    "display_step    = 1\n",
    "\n",
    "# Network Parameters\n",
    "n_input    = 784 # MNIST data input (img shape: 28*28)\n",
    "n_hidden_1 = 256 # 1st layer num features\n",
    "n_hidden_2 = 256 # 2nd layer num features\n",
    "n_hidden_3 = 256 # 3rd layer num features\n",
    "n_hidden_4 = 256 # 4th layer num features\n",
    "n_classes  = 10 # MNIST total classes (0-9 digits)\n",
    "\n",
    "# tf Graph input\n",
    "x = tf.placeholder(\"float\", [None, n_input])\n",
    "y = tf.placeholder(\"float\", [None, n_classes])\n",
    "dropout_keep_prob = tf.placeholder(\"float\")\n",
    "\n",
    "# Create model\n",
    "def multilayer_perceptron(_X, _weights, _biases, _keep_prob):\n",
    "    layer_1 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])), _keep_prob)\n",
    "    layer_2 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_1, _weights['h2']), _biases['b2'])), _keep_prob)\n",
    "    layer_3 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_2, _weights['h3']), _biases['b3'])), _keep_prob) \n",
    "    layer_4 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_3, _weights['h4']), _biases['b4'])), _keep_prob) \n",
    "    return (tf.matmul(layer_4, _weights['out']) + _biases['out']) # No need to use softmax??\n",
    "\n",
    "# Store layers weight & bias\n",
    "weights = {\n",
    "    'h1': tf.get_variable(\"h1\", shape=[n_input, n_hidden_1],    initializer=xavier_init(n_input,n_hidden_1)),\n",
    "    'h2': tf.get_variable(\"h2\", shape=[n_hidden_1, n_hidden_2], initializer=xavier_init(n_hidden_1,n_hidden_2)),\n",
    "    'h3': tf.get_variable(\"h3\", shape=[n_hidden_2, n_hidden_3], initializer=xavier_init(n_hidden_2,n_hidden_3)),\n",
    "    'h4': tf.get_variable(\"h4\", shape=[n_hidden_3, n_hidden_4], initializer=xavier_init(n_hidden_3,n_hidden_4)),\n",
    "    'out': tf.get_variable(\"out\", shape=[n_hidden_4, n_classes], initializer=xavier_init(n_hidden_4,n_classes))\n",
    "}\n",
    "biases = {\n",
    "    'b1': tf.Variable(tf.random_normal([n_hidden_1])),\n",
    "    'b2': tf.Variable(tf.random_normal([n_hidden_2])),\n",
    "    'b3': tf.Variable(tf.random_normal([n_hidden_3])),\n",
    "    'b4': tf.Variable(tf.random_normal([n_hidden_4])),\n",
    "    'out': tf.Variable(tf.random_normal([n_classes]))\n",
    "}\n",
    "\n",
    "# Construct model\n",
    "pred = multilayer_perceptron(x, weights, biases, dropout_keep_prob)\n",
    "\n",
    "# Define loss and optimizer\n",
    "cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer\n",
    "# optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.8).minimize(cost) # Adam Optimizer\n",
    "\n",
    "# Accuracy \n",
    "correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))\n",
    "accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
    "\n",
    "# Initializing the variables\n",
    "init = tf.initialize_all_variables()\n",
    "\n",
    "print (\"Network Ready\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 000/050 cost: 0.418188300\n",
      "Training accuracy: 0.970\n",
      "Epoch: 001/050 cost: 0.135868739\n",
      "Training accuracy: 0.960\n",
      "Epoch: 002/050 cost: 0.095398066\n",
      "Training accuracy: 0.980\n",
      "Epoch: 003/050 cost: 0.075697627\n",
      "Training accuracy: 0.970\n",
      "Epoch: 004/050 cost: 0.061306230\n",
      "Training accuracy: 0.980\n",
      "Epoch: 005/050 cost: 0.052467199\n",
      "Training accuracy: 0.970\n",
      "Epoch: 006/050 cost: 0.046108751\n",
      "Training accuracy: 0.970\n",
      "Epoch: 007/050 cost: 0.040648738\n",
      "Training accuracy: 1.000\n",
      "Epoch: 008/050 cost: 0.034662794\n",
      "Training accuracy: 0.980\n",
      "Epoch: 009/050 cost: 0.030895058\n",
      "Training accuracy: 0.980\n",
      "Epoch: 010/050 cost: 0.027404196\n",
      "Training accuracy: 0.990\n",
      "Epoch: 011/050 cost: 0.025599543\n",
      "Training accuracy: 0.990\n",
      "Epoch: 012/050 cost: 0.022524802\n",
      "Training accuracy: 0.990\n",
      "Epoch: 013/050 cost: 0.020978481\n",
      "Training accuracy: 1.000\n",
      "Epoch: 014/050 cost: 0.018992848\n",
      "Training accuracy: 1.000\n",
      "Epoch: 015/050 cost: 0.018180760\n",
      "Training accuracy: 1.000\n",
      "Epoch: 016/050 cost: 0.015807947\n",
      "Training accuracy: 1.000\n",
      "Epoch: 017/050 cost: 0.015066529\n",
      "Training accuracy: 1.000\n",
      "Epoch: 018/050 cost: 0.013667117\n",
      "Training accuracy: 0.990\n",
      "Epoch: 019/050 cost: 0.012571550\n",
      "Training accuracy: 1.000\n",
      "Epoch: 020/050 cost: 0.011941066\n",
      "Training accuracy: 0.990\n",
      "Epoch: 021/050 cost: 0.011349857\n",
      "Training accuracy: 0.990\n",
      "Epoch: 022/050 cost: 0.010185919\n",
      "Training accuracy: 1.000\n",
      "Epoch: 023/050 cost: 0.010433348\n",
      "Training accuracy: 1.000\n",
      "Epoch: 024/050 cost: 0.009212835\n",
      "Training accuracy: 1.000\n",
      "Epoch: 025/050 cost: 0.008935386\n",
      "Training accuracy: 0.990\n",
      "Epoch: 026/050 cost: 0.007533159\n",
      "Training accuracy: 1.000\n",
      "Epoch: 027/050 cost: 0.008139531\n",
      "Training accuracy: 1.000\n",
      "Epoch: 028/050 cost: 0.007539478\n",
      "Training accuracy: 1.000\n",
      "Epoch: 029/050 cost: 0.007227370\n",
      "Training accuracy: 1.000\n",
      "Epoch: 030/050 cost: 0.007473362\n",
      "Training accuracy: 1.000\n",
      "Epoch: 031/050 cost: 0.006676663\n",
      "Training accuracy: 0.980\n",
      "Epoch: 032/050 cost: 0.005731412\n",
      "Training accuracy: 1.000\n",
      "Epoch: 033/050 cost: 0.005602606\n",
      "Training accuracy: 0.990\n",
      "Epoch: 034/050 cost: 0.005471543\n",
      "Training accuracy: 1.000\n",
      "Epoch: 035/050 cost: 0.005467167\n",
      "Training accuracy: 1.000\n",
      "Epoch: 036/050 cost: 0.006077243\n",
      "Training accuracy: 1.000\n",
      "Epoch: 037/050 cost: 0.005801255\n",
      "Training accuracy: 1.000\n",
      "Epoch: 038/050 cost: 0.005572326\n",
      "Training accuracy: 1.000\n",
      "Epoch: 039/050 cost: 0.005355799\n",
      "Training accuracy: 1.000\n",
      "Epoch: 040/050 cost: 0.004890651\n",
      "Training accuracy: 1.000\n",
      "Epoch: 041/050 cost: 0.004345889\n",
      "Training accuracy: 1.000\n",
      "Epoch: 042/050 cost: 0.004596357\n",
      "Training accuracy: 1.000\n",
      "Epoch: 043/050 cost: 0.003729049\n",
      "Training accuracy: 1.000\n",
      "Epoch: 044/050 cost: 0.004191519\n",
      "Training accuracy: 1.000\n",
      "Epoch: 045/050 cost: 0.004694648\n",
      "Training accuracy: 1.000\n",
      "Epoch: 046/050 cost: 0.003776975\n",
      "Training accuracy: 1.000\n",
      "Epoch: 047/050 cost: 0.004078514\n",
      "Training accuracy: 0.990\n",
      "Epoch: 048/050 cost: 0.003377332\n",
      "Training accuracy: 1.000\n",
      "Epoch: 049/050 cost: 0.003732143\n",
      "Training accuracy: 1.000\n",
      "Optimization Finished!\n"
     ]
    }
   ],
   "source": [
    "# Launch the graph\n",
    "sess = tf.Session()\n",
    "sess.run(init)\n",
    "\n",
    "# Training cycle\n",
    "for epoch in range(training_epochs):\n",
    "    avg_cost = 0.\n",
    "    total_batch = int(mnist.train.num_examples/batch_size)\n",
    "    # Loop over all batches\n",
    "    for i in range(total_batch):\n",
    "        batch_xs, batch_ys = mnist.train.next_batch(batch_size)\n",
    "        # Fit training using batch data\n",
    "        sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob: 0.7})\n",
    "        # Compute average loss\n",
    "        avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob:1.})/total_batch\n",
    "    # Display logs per epoch step\n",
    "    if epoch % display_step == 0:\n",
    "        print (\"Epoch: %03d/%03d cost: %.9f\" % (epoch, training_epochs, avg_cost))\n",
    "        train_acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob:1.})\n",
    "        print (\"Training accuracy: %.3f\" % (train_acc))\n",
    "\n",
    "print (\"Optimization Finished!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training accuracy: 0.982\n"
     ]
    }
   ],
   "source": [
    "test_acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, dropout_keep_prob:1.})\n",
    "print (\"Training accuracy: %.3f\" % (test_acc))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
